library(tidyverse)
library(knitr)
library(kableExtra)
library(janitor)
library(scales)
library(ggExtra)
library(ggrepel)
remotes::install_github("hughjonesd/ggmagnify")
library(ggmagnify)
library(ggplot2)
library(devtools)
devtools::install_github("psyteachr/introdataviz")
library(introdataviz)
library(survival)
library(lattice)
library(Hmisc)
library(report)
devtools::install_github("thomasp85/patchwork")
library(patchwork)Load the processed data and apply the global exclusions.
data_processed <- read_csv("../data/processed/data_processed.csv")
data_processed_after_exclusions <- data_processed |>
filter(exclude_participant == "include")data_processed |>
count(name = "n") |>
kable() |>
add_header_above(header = c("Whole sample" = 1)) |> # note that you can add header rows to tables like this. The "1" indicates the number of columns the header should span. The sum of these numbers must equal the number of columns or you'll get an error.
kable_classic(full_width = FALSE)| n |
|---|
| 102 |
Sample used in subsequent analyses
data_processed_after_exclusions |>
count(name = "n") |>
kable() |>
add_header_above(header = c("For analysis" = 1)) |>
kable_classic(full_width = FALSE)| n |
|---|
| 90 |
data_processed_after_exclusions |>
mutate(age = as.numeric(age)) |>
summarise(Mean = mean(age, na.rm = TRUE),
SD = sd(age, na.rm = TRUE)) |>
mutate_all(.funs = janitor::round_half_up, digits = 1) |>
kable() |>
add_header_above(header = c("Age" = 2)) |>
kable_classic(full_width = FALSE)| Mean | SD |
|---|---|
| 36.8 | 12.4 |
data_processed_after_exclusions |>
rename(Gender = gender) |>
group_by(Gender) |>
summarise(n = n()) |>
mutate(Percent = paste0(round_half_up((n / sum(n)) * 100, 1), "%")) |>
mutate(Gender = stringr::str_to_sentence(Gender)) |> # Change the case of the Gender variable so that it prints nicely
kable() |>
kable_classic(full_width = FALSE)| Gender | n | Percent |
|---|---|---|
| Female | 39 | 43.3% |
| Male | 48 | 53.3% |
| Nonbinary | 3 | 3.3% |
Descriptive statistics and plots of the measures (excluding the demographics variables)
# overall self-reported evaluations
dat_mean_ratings <- data_processed_after_exclusions |>
summarise(Mean = mean(mean_evaluation, na.rm = TRUE),
SD = sd(mean_evaluation, na.rm = TRUE),
n = n()) |>
mutate(group = "Full sample")
# self-reported evaluations by gender category
dat_mean_ratings_by_gender <- data_processed_after_exclusions |>
group_by(group = gender) |>
summarise(Mean = mean(mean_evaluation, na.rm = TRUE),
SD = sd(mean_evaluation, na.rm = TRUE),
n = n())
# combine both into one table
bind_rows(dat_mean_ratings,
dat_mean_ratings_by_gender) |>
select(Subset = group, Mean, SD, n) |> # select variables of interest, and rename one
mutate(Subset = stringr::str_to_sentence(Subset)) |> # Change the case of the Subset variable so that it prints nicely
mutate_if(is.numeric, round_half_up, digits = 2) |>
kable() |>
add_header_above(header = c(" " = 1, "Self-reported evaluations" = 3)) |>
kable_classic(full_width = FALSE)| Subset | Mean | SD | n |
|---|---|---|---|
| Full sample | 1.64 | 1.02 | 90 |
| Female | 1.32 | 0.71 | 39 |
| Male | 1.81 | 1.03 | 48 |
| Nonbinary | 3.33 | 2.19 | 3 |
ggplot(data_processed_after_exclusions, aes(x = mean_evaluation)) +
geom_histogram(binwidth = 1,
boundary = 0,
fill = viridis_pal(begin = 0.45, option = "mako")(1),
color = viridis_pal(begin = 0.30, option = "mako")(1)) +
xlab("Mean self-reported evaluation") +
ylab("Frequency") +
theme_linedraw() +
scale_x_continuous(breaks = pretty_breaks(n = 7)) +
coord_cartesian(xlim = c(1, 7)) +
theme(panel.grid.minor = element_blank())add table of means, SDs, Ns
# overall AMP score
dat_AMP_score <- data_processed_after_exclusions |>
summarise(Mean = mean(AMP_score, na.rm = TRUE),
SD = sd(AMP_score, na.rm = TRUE),
n = n()) |>
mutate(group = "Full sample")
# AMP score by gender category
dat_AMP_score_by_gender <- data_processed_after_exclusions |>
group_by(group = gender) |>
summarise(Mean = mean(AMP_score, na.rm = TRUE),
SD = sd(AMP_score, na.rm = TRUE),
n = n())
# combine both into one table
bind_rows(dat_AMP_score,
dat_AMP_score_by_gender) |>
select(Subset = group, Mean, SD, n) |> # select variables of interest, and rename one
mutate(Subset = stringr::str_to_sentence(Subset)) |> # Change the case of the Subset variable so that it prints nicely
mutate_if(is.numeric, round_half_up, digits = 2) |>
kable() |>
add_header_above(header = c(" " = 1, "AMP score" = 3)) |>
kable_classic(full_width = FALSE)| Subset | Mean | SD | n |
|---|---|---|---|
| Full sample | 0.58 | 0.19 | 90 |
| Female | 0.58 | 0.21 | 39 |
| Male | 0.58 | 0.18 | 48 |
| Nonbinary | 0.55 | 0.06 | 3 |
ggplot(data_processed_after_exclusions, aes(x = AMP_score)) +
geom_histogram(binwidth = 0.05,
boundary = 0,
fill = viridis_pal(begin = 0.45, option = "mako")(1),
color = viridis_pal(begin = 0.30, option = "mako")(1)) +
xlab("AMP score") +
ylab("Frequency") +
theme_linedraw() +
scale_x_continuous(breaks = pretty_breaks(n = 10))# Multiple versions
# Binwidth = 0.1, starting point not one
ggplot(data_processed_after_exclusions, aes(AMP_score)) +
geom_histogram(binwidth = 0.1,
fill = "white",
colour = "black")# Binwidth = 0.1, starting point 0
ggplot(data_processed_after_exclusions, aes(AMP_score)) +
geom_histogram(binwidth = 0.1,
fill = "white",
colour = "black",
boundary = 0.05) +
xlab("AMP score") +
ylab("Frequency") +
theme_linedraw()# seperate for gender
p1 <- ggplot(data_processed_after_exclusions, aes(AMP_score, fill = gender)) +
geom_histogram(binwidth = 0.1,
boundary = 0,
colour = "black") +
xlab("AMP score") +
ylab("Frequency") +
theme_linedraw()
p1split histogram, split violin plot, raincloud plot, etc.
Split histogram plot (for all genders)
# All in one histogram ("stacked")
ggplot(data_processed_after_exclusions, aes(x = mean_evaluation)) +
geom_histogram(aes(fill = gender),
binwidth = 1,
boundary = 0,
color = "black") +
xlab("Mean self-reported evaluation") +
ylab("Frequency") +
theme_linedraw() +
scale_x_continuous(breaks = pretty_breaks(n = 7)) +
coord_cartesian(xlim = c(1, 7)) +
theme(panel.grid.minor = element_blank())# All in one histogram ("dodged")
ggplot(data_processed_after_exclusions, aes(x = mean_evaluation,
group = gender,
fill = gender))+
geom_histogram(position="dodge",binwidth=0.4) +
theme_bw()# Separate histograms next to eachother
ggplot(data_processed_after_exclusions, aes(x = mean_evaluation)) +
geom_histogram() +
facet_grid(~gender) +
theme_bw()Split histogram plot (only men and women)
# Data without nonbinary
data_processed_after_exclusions_H2 <- data_processed_after_exclusions %>%
filter(gender != "nonbinary")
# All in one histogram ("stacked")
ggplot(data_processed_after_exclusions_H2, aes(x = mean_evaluation)) +
geom_histogram(aes(fill = gender),
binwidth = 1,
boundary = 0,
color = "black") +
xlab("Mean self-reported evaluation") +
ylab("Frequency") +
theme_linedraw() +
scale_x_continuous(breaks = pretty_breaks(n = 7)) +
coord_cartesian(xlim = c(1, 7)) +
theme(panel.grid.minor = element_blank())# All in one histogram ("dodged")
ggplot(data_processed_after_exclusions_H2, aes(x = mean_evaluation,
group = gender,
fill = gender))+
geom_histogram(position="dodge",binwidth=0.4) +
theme_bw()# Separate histograms next to eachother
ggplot(data_processed_after_exclusions_H2, aes(x = mean_evaluation)) +
geom_histogram() +
facet_grid(~gender) +
theme_bw()Split violin plot (seperate for all genders)
ggplot(data_processed_after_exclusions, aes(x = gender, y = mean_evaluation,
fill = gender)) +
introdataviz::geom_split_violin(alpha = .4, trim = FALSE) +
geom_boxplot(width = .2, alpha = .6, fatten = NULL, show.legend = FALSE) +
stat_summary(fun.data = "mean_se", geom = "pointrange", show.legend = F,
position = position_dodge(.175)) +
scale_x_discrete(name = "Gender", labels = c("female", "male", "nonbinary")) +
scale_y_continuous(name = "self-reported evaluations",
breaks = seq(0, 7, 1),
limits = c(0, 7)) +
scale_fill_brewer(palette = "Dark2", name = "Gender") +
theme_minimal()Split violin plot (only men and women)
ggplot(data_processed_after_exclusions_H2, aes(x ="", y = mean_evaluation,
fill = gender)) +
introdataviz::geom_split_violin(alpha = .4, trim = FALSE) +
geom_boxplot(width = .2, alpha = .6, fatten = NULL, show.legend = FALSE) +
stat_summary(fun.data = "mean_se", geom = "pointrange", show.legend = F,
position = position_dodge(.175)) +
scale_y_continuous(name = "self-reported evaluations",
breaks = seq(0, 7, 1),
limits = c(0, 7)) +
scale_fill_brewer(palette = "Dark2", name = "Gender") +
theme_minimal()Split raincloud plot (only men and women)
rain_height <- .1
ggplot(data_processed_after_exclusions_H2, aes(x = "", y = mean_evaluation,
fill = gender)) +
# clouds
introdataviz::geom_flat_violin(trim=FALSE, alpha = 0.4,
position = position_nudge(x = rain_height+.05)) +
# rain
geom_point(aes(colour = gender), size = 2, alpha = .5, show.legend = FALSE,
position = position_jitter(width = rain_height, height = 0)) +
# boxplots
geom_boxplot(width = rain_height, alpha = 0.4, show.legend = FALSE,
outlier.shape = NA,
position = position_nudge(x = -rain_height*2)) +
# mean and SE point in the cloud
stat_summary(fun.data = mean_cl_normal, mapping = aes(color = gender), show.legend = FALSE,
position = position_nudge(x = rain_height * 3)) +
# adjust layout
scale_x_discrete(name = "", expand = c(rain_height*3, 0, 0, 0.7)) +
scale_y_continuous(name = "mean self-reported evaluation",
breaks = seq(0, 7, 1),
limits = c(0, 7)) +
coord_flip() +
facet_wrap(~factor(gender,
levels = c("male", "female"),
labels = c("male", "female")),
nrow = 2) +
# custom colours and theme
scale_fill_brewer(palette = "Dark2", name = "Gender") +
scale_colour_brewer(palette = "Dark2") +
theme_minimal() +
theme(panel.grid.major.y = element_blank(),
legend.position = c(0.8, 0.8),
legend.background = element_rect(fill = "white", color = "white"))As a test statistic, I use the unpaired two-samples t-test to compare the mean self-reported evaluation of the two independent groups (men and women). I chose to test two-sided, as there is no indication why one sex should achieve higher values than the other; no directional hypothesis exists.
t.test(data_processed_after_exclusions_H2$mean_evaluation~data_processed_after_exclusions_H2$gender,
var.equal = TRUE,
alternative = "two.sided")##
## Two Sample t-test
##
## data: data_processed_after_exclusions_H2$mean_evaluation by data_processed_after_exclusions_H2$gender
## t = -2.5221, df = 85, p-value = 0.01353
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
## -0.8750556 -0.1035768
## sample estimates:
## mean in group female mean in group male
## 1.316239 1.805556
Interpretation:
The t-test consists of the following two hypothesis:
Null hypothesis (H0): The means for the two populations are equal.
Alternative hypothesis (H1) : The means for the two populations are not equal.
As the p-value (0.014) is less than our specified significance level (0.05), we can reject the null hypothesis. This means that the difference between the two means is statistically significant. The sample provides strong enough evidence to conclude that the two population means are not equal. Men and women differ statistically significant in their mean self-reported evaluation. This is further shown in the fact that the 95% confidence interval does not contain 0. The mean self-reported evaluation of men (1.81) is significantly higher than the mean self-reported evaluation of women (1.32).
split histogram, split violin plot, raincloud plot, etc.
This time, vary the labeling and order of the legend, e.g., capitalise “Men” and “Women”, and know how to change the order of the factors.
Split histogram plot (only men and women)
# Data without nonbinary
data_processed_after_exclusions_H3 <- data_processed_after_exclusions %>%
filter(gender != "nonbinary")
# All in one histogram ("stacked")
ggplot(data_processed_after_exclusions_H3, aes(x = AMP_score)) +
geom_histogram(aes(fill = gender),
binwidth = 0.1,
boundary = 0,
color = "black") +
xlab("Mean AMP score") +
ylab("Frequency") +
theme_linedraw() +
scale_x_continuous(breaks = pretty_breaks(n = 10)) +
coord_cartesian(xlim = c(0, 1)) +
theme(panel.grid.minor = element_blank())# All in one histogram ("dodged")
ggplot(data_processed_after_exclusions_H3, aes(x = AMP_score,
group = gender,
fill = gender))+
geom_histogram(position="dodge",binwidth=0.1) +
theme_bw()# Separate histograms next to eachother
ggplot(data_processed_after_exclusions_H3, aes(x = AMP_score)) +
geom_histogram() +
facet_grid(~gender) +
theme_bw()Split violin plot (only men and women)
p2 <- ggplot(data_processed_after_exclusions_H3, aes(x ="", y = AMP_score,
fill = gender)) +
introdataviz::geom_split_violin(alpha = .4, trim = FALSE) +
geom_boxplot(width = .2, alpha = .6, fatten = NULL, show.legend = FALSE) +
stat_summary(fun.data = "mean_se", geom = "pointrange", show.legend = F,
position = position_dodge(.175)) +
scale_y_continuous(name = "mean AMP score",
breaks = seq(0, 1, 0.100),
limits = c(0, 1)) +
scale_fill_brewer(palette = "Dark2", name = "Gender") +
theme_minimal()
p2Split raincloud plot (only men and women)
rain_height <- .1
p3 <- ggplot(data_processed_after_exclusions_H3, aes(x = "", y = AMP_score,
fill = gender)) +
# clouds
introdataviz::geom_flat_violin(trim=FALSE, alpha = 0.4,
position = position_nudge(x = rain_height+.05)) +
# rain
geom_point(aes(colour = gender), size = 2, alpha = .5, show.legend = FALSE,
position = position_jitter(width = rain_height, height = 0)) +
# boxplots
geom_boxplot(width = rain_height, alpha = 0.4, show.legend = FALSE,
outlier.shape = NA,
position = position_nudge(x = -rain_height*2)) +
# mean and SE point in the cloud
stat_summary(fun.data = mean_cl_normal, mapping = aes(color = gender), show.legend = FALSE,
position = position_nudge(x = rain_height * 3)) +
# adjust layout
scale_x_discrete(name = "", expand = c(rain_height*3, 0, 0, 0.7)) +
scale_y_continuous(name = "mean AMP score",
breaks = seq(0, 1, 0.100),
limits = c(0, 1)) +
coord_flip() +
facet_wrap(~factor(gender,
levels = c("Male", "Female"),
labels = c("Male", "Female")),
nrow = 2) +
# custom colours and theme
scale_fill_brewer(palette = "Dark2", name = "Gender") +
scale_colour_brewer(palette = "Dark2") +
theme_minimal() +
theme(panel.grid.major.y = element_blank(),
legend.position = c(0.8, 0.8),
legend.background = element_rect(fill = "white", color = "white"))
p3run an appropriate test. Below the output, print an interpretation of the results generated by the “easystats” package
https://easystats.github.io/report/)
As a test statistic, I use the unpaired two-samples t-test to compare the mean AMP score of the two independent groups (men and women). I chose to test two-sided, as there is no indication why one sex should achieve higher values than the other; no directional hypothesis exists.
t.test(data_processed_after_exclusions_H3$AMP_score~data_processed_after_exclusions_H2$gender,
var.equal = TRUE,
alternative = "two.sided")##
## Two Sample t-test
##
## data: data_processed_after_exclusions_H3$AMP_score by data_processed_after_exclusions_H2$gender
## t = 0.074348, df = 85, p-value = 0.9409
## alternative hypothesis: true difference in means between group female and group male is not equal to 0
## 95 percent confidence interval:
## -0.08136332 0.08768454
## sample estimates:
## mean in group female mean in group male
## 0.5847578 0.5815972
Interpretation:
The t-test consists of the following two hypothesis:
Null hypothesis (H0): The means for the two populations are equal.
Alternative hypothesis (H1) : The means for the two populations are not equal.
As the p-value (0.94) is greater than our specified significance level (0.05), we can’t reject the null hypothesis. This means that the difference between the two means is not statistically significant. The sample does not provide strong enough evidence to conclude that the two population means are not equal. Men and women do not differ statistically significant in their mean AMP score. This is further shown in the fact that the 95% confidence interval does contain 0. The mean AMP score of women (0.585) is not significantly higher than the mean AMP score of men (0.582).
report(t.test(data_processed_after_exclusions_H3$AMP_score ~ data_processed_after_exclusions_H3$gender,
var.equal = TRUE,
alternative = "two.sided"))## Effect sizes were labelled following Cohen's (1988) recommendations.
##
## The Two Sample t-test testing the difference of
## data_processed_after_exclusions_H3$AMP_score by
## data_processed_after_exclusions_H3$gender (mean in group female = 0.58, mean in
## group male = 0.58) suggests that the effect is positive, statistically not
## significant, and very small (difference = 3.16e-03, 95% CI [-0.08, 0.09], t(85)
## = 0.07, p = 0.941; Cohen's d = 0.02, 95% CI [-0.41, 0.44])
Combine plots using patchwork: https://patchwork.data-imaginist.com/
Combine at least three of the above plots into one.
combined_AMP <- (p1 | p2) /
p3
combined_AMPSave plots to disk with ggsave()
Save the above combined plot to disk as both .png and .pdf. Ensure the png has at least 300dpi resolution.
ggsave("combined_AMP.pdf")
ggsave("combined_AMP.png")It tells everything about the instances of r that created this work. Useful for recreation of the code and the results. There is reproducability over time. Prints at the end of the HTML file.
sessionInfo()## R version 4.1.3 (2022-03-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur/Monterey 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] patchwork_1.1.3.9000 report_0.5.7 Hmisc_5.1-0
## [4] lattice_0.20-45 survival_3.2-13 introdataviz_0.0.0.9003
## [7] devtools_2.4.5 usethis_2.1.6 ggmagnify_0.2.0.9000
## [10] ggrepel_0.9.4 ggExtra_0.10.1 scales_1.2.0
## [13] janitor_2.2.0 kableExtra_1.3.4 knitr_1.39
## [16] forcats_0.5.1 stringr_1.4.0 dplyr_1.1.0
## [19] purrr_1.0.1 readr_2.1.2 tidyr_1.2.0
## [22] tibble_3.1.8 ggplot2_3.3.6 tidyverse_1.3.2
##
## loaded via a namespace (and not attached):
## [1] googledrive_2.0.0 colorspace_2.0-3 ellipsis_0.3.2
## [4] snakecase_0.11.1 htmlTable_2.4.1 parameters_0.21.3
## [7] base64enc_0.1-3 fs_1.5.2 rstudioapi_0.13
## [10] farver_2.1.0 remotes_2.4.2.1 bit64_4.0.5
## [13] fansi_1.0.3 lubridate_1.9.2 xml2_1.3.3
## [16] splines_4.1.3 cachem_1.0.6 pkgload_1.3.2
## [19] Formula_1.2-5 jsonlite_1.8.0 broom_0.8.0
## [22] cluster_2.1.2 dbplyr_2.3.0 effectsize_0.8.6
## [25] shiny_1.7.4 compiler_4.1.3 httr_1.4.2
## [28] backports_1.4.1 assertthat_0.2.1 Matrix_1.5-1
## [31] fastmap_1.1.0 gargle_1.3.0 cli_3.6.0
## [34] later_1.3.0 htmltools_0.5.4 prettyunits_1.1.1
## [37] tools_4.1.3 gtable_0.3.0 glue_1.6.2
## [40] Rcpp_1.0.8.3 cellranger_1.1.0 jquerylib_0.1.4
## [43] vctrs_0.5.2 nlme_3.1-155 svglite_2.1.0
## [46] insight_0.19.7 xfun_0.41 ps_1.7.0
## [49] rvest_1.0.2 timechange_0.2.0 mime_0.12
## [52] miniUI_0.1.1.1 lifecycle_1.0.3 googlesheets4_1.0.1
## [55] vroom_1.5.7 ragg_1.2.5 hms_1.1.1
## [58] promises_1.2.0.1 parallel_4.1.3 RColorBrewer_1.1-3
## [61] yaml_2.3.5 curl_4.3.2 memoise_2.0.1
## [64] gridExtra_2.3 sass_0.4.1 rpart_4.1.16
## [67] stringi_1.7.6 bayestestR_0.13.1 highr_0.9
## [70] checkmate_2.1.0 pkgbuild_1.4.0 rlang_1.0.6
## [73] pkgconfig_2.0.3 systemfonts_1.0.4 evaluate_0.15
## [76] labeling_0.4.2 htmlwidgets_1.6.1 bit_4.0.4
## [79] tidyselect_1.2.0 processx_3.8.0 plyr_1.8.8
## [82] magrittr_2.0.3 R6_2.5.1 generics_0.1.2
## [85] profvis_0.3.7 DBI_1.1.2 mgcv_1.8-39
## [88] pillar_1.7.0 haven_2.5.1 foreign_0.8-82
## [91] withr_2.5.0 datawizard_0.9.0 nnet_7.3-17
## [94] modelr_0.1.10 crayon_1.5.1 utf8_1.2.2
## [97] tzdb_0.3.0 rmarkdown_2.14 urlchecker_1.0.1
## [100] grid_4.1.3 readxl_1.4.2 data.table_1.14.2
## [103] callr_3.7.3 reprex_2.0.2 digest_0.6.31
## [106] webshot_0.5.3 xtable_1.8-4 httpuv_1.6.9
## [109] textshaping_0.3.6 munsell_0.5.0 viridisLite_0.4.0
## [112] bslib_0.3.1 sessioninfo_1.2.2